options(warn=-1)
if (!require("pacman"))
install.packages("pacman")
# use this line for installing/loading
pacman::p_load(tidyverse,
glue,
scales,
openintro,
gridExtra,
ggrepel,
ggmap,
ggridges,
dsbox,
devtools,
fs,
janitor,
here,
dplyr,
palmerpenguins,
stringr,
ggplot2,
plotly,
Hmisc,
ggExtra,
patchwork,
viridis
) HW 03
1 - Du Bois challenge.
income <- read_csv(here("data", "income.csv"),
show_col_types = FALSE)income <- income |>
mutate(
Rent_pct = Rent / 100,
Food_pct = Food / 100,
Clothes_pct = Clothes / 100,
Tax_pct = Tax / 100,
Other_pct = Other / 100
)income <- income %>%
mutate(Class = paste0(as.character(Class), " $", as.character(Average_Income)))income$Class <- factor(income$Class, levels = c("$1000 AND OVER $1125", "$750-1000 $880", "$500-750 $547", "$400-500 $433.82", "$300-400 $335.66", "$200-300 $249.45", "$100-200 $139.1"))Sources: https://stackoverflow.com/questions/18413756/re-ordering-factor-levels-in-data-frame
glimpse(income)Rows: 7
Columns: 12
$ Class <fct> $100-200 $139.1, $200-300 $249.45, $300-400 $335.…
$ Average_Income <dbl> 139.10, 249.45, 335.66, 433.82, 547.00, 880.00, 1125.00
$ Rent <dbl> 19, 22, 23, 18, 13, 0, 0
$ Food <dbl> 43, 47, 43, 37, 31, 37, 29
$ Clothes <dbl> 28, 23, 18, 15, 17, 19, 16
$ Tax <dbl> 9.9, 4.0, 4.5, 5.5, 5.0, 8.0, 4.5
$ Other <dbl> 0.1, 4.0, 11.5, 24.5, 34.0, 36.0, 50.5
$ Rent_pct <dbl> 0.19, 0.22, 0.23, 0.18, 0.13, 0.00, 0.00
$ Food_pct <dbl> 0.43, 0.47, 0.43, 0.37, 0.31, 0.37, 0.29
$ Clothes_pct <dbl> 0.28, 0.23, 0.18, 0.15, 0.17, 0.19, 0.16
$ Tax_pct <dbl> 0.099, 0.040, 0.045, 0.055, 0.050, 0.080, 0.045
$ Other_pct <dbl> 0.001, 0.040, 0.115, 0.245, 0.340, 0.360, 0.505
# Your original code, modified to include text labels
fig <- plot_ly(income, x = ~Rent, y = ~Class,
type = 'bar',
orientation = 'h',
name = 'Rent',
# --- Add these lines ---
text = ~Rent_pct, # Use the percentage column for the text data
textposition = 'inside',
texttemplate = '%{text:.0%}', # Format the text as a percentage with 0 decimal places
# ----------------------
marker = list(color = '#121210'))
fig <- fig %>% add_trace(x = ~Food, name = 'Food',
# --- Add these lines ---
text = ~Food_pct,
textposition = 'inside',
texttemplate = '%{text:.0%}',
# ----------------------
marker = list(color = '#7D5A7F'))
fig <- fig %>% add_trace(x = ~Clothes, name = 'Clothes',
# --- Add these lines ---
text = ~Clothes_pct,
textposition = 'inside',
texttemplate = '%{text:.0%}',
# ----------------------
marker = list(color = '#D79684'))
fig <- fig %>% add_trace(x = ~Tax, name = 'Tax',
# --- Add these lines ---
text = ~Tax_pct,
textposition = 'inside',
texttemplate = '%{text:.1%}',
# ----------------------
marker = list(color = '#003e80'))
fig <- fig %>% add_trace(x = ~Other, name = 'Other',
# --- Add these lines ---
text = ~Other_pct,
textposition = 'inside',
texttemplate = '%{text:.1%}',
# ----------------------
marker = list(color = '#e6f2ff'))
# Apply the layout (no changes needed here)
fig <- fig %>% layout(
barmode = 'stack',
title = "INCOME AND EXPENITURE OF 150 NEGRO FAMILIES IN ATLANTA, GA. U.S.A.",
titlefont = list(size = 15, color = "#000000"),
xaxis = list(
title = "FOR FUTHER STATISTICS RAISE THIS FRAME",
showticklabels = FALSE
),
annotations = list(
list(
x = -0.28,
y = 1.025,
text = "Class Actual Average",
showarrow = FALSE,
xref = "paper",
yref = "paper"
)
),
yaxis = list(title = ""),
showlegend = FALSE,
plot_bgcolor = "#CAB2A0", # inside plot area
paper_bgcolor = "#CAB2A0" # outside plot area
)
# Display the figure
figSources: https://plotly.com/r/horizontal-bar-charts/ https://plotly.com/r/text-and-annotations/
2 - COVID survey - interpret
The plot illustrates opinions made by several different groups regarding the COVID vaccine below are three observations I made: 1. Nurses seem to strongly recommend the vaccine, with a very small error bar, illustrating that most nurses are of the same mindset. In fact, most groups seem to recommend the vaccines except for a few groups, “Had COVID vaccine: No” and “Gender: Prefer not to say”. 2. Most groups were very confident in the scientific vetting the process for the new COVID vaccines, again, only a few groups showed disagreement to it, being the same groups disagreeing with the previous statement: “Had COVID vaccine: No” and “Gender: Prefer not to say”. 3. This time we could observe two very interesting groups, the “Had COVID vaccine” and the “Had flu vaccine this year” a. It is understandable that the people who had the COVID vaccine responded positively about the vaccine, like the people that had the flu vaccine this year, they mostly had positive feedback. b. On the other hand, the groups that did not have COVID vaccines had responses that were split straight in the middle, but with large amount of uncertainty. This illustrates that the consensus regarding the vaccine within that group is very broad, and that they may not have had the vaccines for a variety of reasons.
3 - COVID survey - reconstruct
covid_survey <- read_csv(here("data", "covid-survey.csv"),
show_col_types = FALSE, skip = 1)
glimpse(covid_survey)Rows: 1,121
Columns: 14
$ response_id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,…
$ exp_profession <dbl> 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ exp_flu_vax <dbl> 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ exp_gender <dbl> 0, 1, NA, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, …
$ exp_race <dbl> 2, 2, NA, 5, 5, 5, 5, 5, 5, 2, 5, 5, 2, 5, 5, …
$ exp_ethnicity <dbl> 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
$ exp_age_bin <dbl> 25, 20, NA, 25, 25, 25, 25, 25, 20, 20, 20, 25…
$ exp_already_vax <dbl> 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ resp_safety <dbl> 5, 5, NA, 5, 5, 5, 5, 4, 4, 5, 5, 5, 5, 5, 5, …
$ resp_confidence_science <dbl> 2, 1, NA, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, …
$ resp_concern_safety <dbl> 2, 1, NA, 1, 1, 1, 1, 4, 4, 1, 2, 2, 3, 1, 3, …
$ resp_feel_safe_at_work <dbl> 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ resp_will_recommend <dbl> 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ resp_trust_info <dbl> 1, 1, NA, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, …
print(
dim(covid_survey)
)[1] 1121 14
covid_survey <- covid_survey %>%
filter(if_all(-response_id, ~ !is.na(.)))
print(
dim(covid_survey)
)[1] 926 14
covid_survey <- covid_survey %>%
mutate(
exp_already_vax = ifelse(exp_already_vax == 0, "No", "Yes"),
exp_flu_vax = ifelse(exp_flu_vax == 0, "No", "Yes"),
exp_profession = ifelse(exp_profession == 0, "Medical", "Nursing"),
exp_gender = ifelse(exp_gender == 0, "Male",
ifelse(exp_gender == 1, "Female",
ifelse(exp_gender == 3, "Non-binary third gender", "Prefer not to say"))),
exp_race = ifelse(exp_race == 1, "American Indian / Alaskan Native",
ifelse(exp_race == 2, "Asian",
ifelse(exp_race == 3, "Black or African American",
ifelse(exp_race == 4, "Native Hawaiian / Other Pacific Islander", "White")))),
exp_ethnicity = ifelse(exp_ethnicity == 1, "Hispanic / Latino", "Non-Hispanic/Non-Latino"),
exp_age_bin = case_when(
exp_age_bin == 0 ~ "<20",
exp_age_bin == 20 ~ "21-25",
exp_age_bin == 25 ~ "26-30",
exp_age_bin == 30 ~ ">30"
)
)
print(
dim(covid_survey)
)[1] 926 14
covid_survey_longer <- covid_survey |>
pivot_longer(
cols = starts_with("exp_"),
names_to = "explanatory",
values_to = "explanatory_value"
) |>
filter(!is.na(explanatory_value)) |>
pivot_longer(
cols = starts_with("resp_"),
names_to = "response",
values_to = "response_value"
)
print(covid_survey_longer)# A tibble: 38,892 × 5
response_id explanatory explanatory_value response response_value
<dbl> <chr> <chr> <chr> <dbl>
1 1 exp_profession Nursing resp_safety 5
2 1 exp_profession Nursing resp_confidence_… 2
3 1 exp_profession Nursing resp_concern_saf… 2
4 1 exp_profession Nursing resp_feel_safe_a… 1
5 1 exp_profession Nursing resp_will_recomm… 1
6 1 exp_profession Nursing resp_trust_info 1
7 1 exp_flu_vax Yes resp_safety 5
8 1 exp_flu_vax Yes resp_confidence_… 2
9 1 exp_flu_vax Yes resp_concern_saf… 2
10 1 exp_flu_vax Yes resp_feel_safe_a… 1
# ℹ 38,882 more rows
covid_survey_summary_stats_by_group <- covid_survey_longer %>%
group_by(explanatory, explanatory_value, response) %>%
summarise(
mean = mean(as.numeric(response_value), na.rm = TRUE),
low = quantile(as.numeric(response_value), probs = 0.10, na.rm = TRUE),
high = quantile(as.numeric(response_value), probs = 0.90, na.rm = TRUE)
)
print(covid_survey_summary_stats_by_group)# A tibble: 126 × 6
# Groups: explanatory, explanatory_value [21]
explanatory explanatory_value response mean low high
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 exp_age_bin 21-25 resp_concern_safety 3.32 2 5
2 exp_age_bin 21-25 resp_confidence_science 1.30 1 2
3 exp_age_bin 21-25 resp_feel_safe_at_work 1.18 1 2
4 exp_age_bin 21-25 resp_safety 1.97 1 5
5 exp_age_bin 21-25 resp_trust_info 1.29 1 2
6 exp_age_bin 21-25 resp_will_recommend 1.09 1 1
7 exp_age_bin 26-30 resp_concern_safety 3.32 1 5
8 exp_age_bin 26-30 resp_confidence_science 1.39 1 2
9 exp_age_bin 26-30 resp_feel_safe_at_work 1.27 1 2
10 exp_age_bin 26-30 resp_safety 2.17 1 5
# ℹ 116 more rows
covid_survey_summary_stats_all <- covid_survey_longer %>%
group_by(response) %>%
summarise(
mean = mean(as.numeric(response_value), na.rm = TRUE),
low = quantile(as.numeric(response_value), probs = 0.10, na.rm = TRUE),
high = quantile(as.numeric(response_value), probs = 0.90, na.rm = TRUE),
explanatory = "All",
explanatory_value = ""
)
print(covid_survey_summary_stats_all)# A tibble: 6 × 6
response mean low high explanatory explanatory_value
<chr> <dbl> <dbl> <dbl> <chr> <chr>
1 resp_concern_safety 3.26 1 5 All ""
2 resp_confidence_science 1.43 1 2 All ""
3 resp_feel_safe_at_work 1.34 1 2 All ""
4 resp_safety 2.04 1 5 All ""
5 resp_trust_info 1.38 1 2 All ""
6 resp_will_recommend 1.21 1 2 All ""
covid_survey_summary_stats <- bind_rows(
covid_survey_summary_stats_by_group,
covid_survey_summary_stats_all
)
print(covid_survey_summary_stats)# A tibble: 132 × 6
# Groups: explanatory, explanatory_value [22]
explanatory explanatory_value response mean low high
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 exp_age_bin 21-25 resp_concern_safety 3.32 2 5
2 exp_age_bin 21-25 resp_confidence_science 1.30 1 2
3 exp_age_bin 21-25 resp_feel_safe_at_work 1.18 1 2
4 exp_age_bin 21-25 resp_safety 1.97 1 5
5 exp_age_bin 21-25 resp_trust_info 1.29 1 2
6 exp_age_bin 21-25 resp_will_recommend 1.09 1 1
7 exp_age_bin 26-30 resp_concern_safety 3.32 1 5
8 exp_age_bin 26-30 resp_confidence_science 1.39 1 2
9 exp_age_bin 26-30 resp_feel_safe_at_work 1.27 1 2
10 exp_age_bin 26-30 resp_safety 2.17 1 5
# ℹ 122 more rows
covid_survey_summary_stats <- covid_survey_summary_stats %>%
mutate(
explanatory = case_when(
explanatory == "exp_age_bin" ~ "Age",
explanatory == "exp_already_vax" ~ "Had COVID vaccine",
explanatory == "exp_flu_vax" ~ "Had flu vaccine this year",
explanatory == "exp_profession" ~ "Profession",
explanatory == "exp_gender" ~ "Gender",
explanatory == "exp_race" ~ "Race",
explanatory == "exp_ethnicity" ~ "Ethnicity",
explanatory == "All" ~ "All"
),
response = case_when(
response == "resp_safety" ~ "Based on my understanding, I believe the vaccine is safe",
response == "resp_confidence_science" ~ "I am confident in the scientific vetting process for the new COVID vaccines",
response == "resp_feel_safe_at_work" ~ "Getting the vaccine will make me feel safer at work",
response == "resp_will_recommend" ~ "I will recommend the vaccine to family, friends, and community members",
response == "resp_trust_info" ~ "I trust the information that I have received about the vaccines",
response == "resp_concern_safety" ~ "I am concerned about the safety and side effects of the vaccine"
)
)
print(covid_survey_summary_stats)# A tibble: 132 × 6
# Groups: explanatory, explanatory_value [22]
explanatory explanatory_value response mean low high
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 Age 21-25 I am concerned about the saf… 3.32 2 5
2 Age 21-25 I am confident in the scient… 1.30 1 2
3 Age 21-25 Getting the vaccine will mak… 1.18 1 2
4 Age 21-25 Based on my understanding, I… 1.97 1 5
5 Age 21-25 I trust the information that… 1.29 1 2
6 Age 21-25 I will recommend the vaccine… 1.09 1 1
7 Age 26-30 I am concerned about the saf… 3.32 1 5
8 Age 26-30 I am confident in the scient… 1.39 1 2
9 Age 26-30 Getting the vaccine will mak… 1.27 1 2
10 Age 26-30 Based on my understanding, I… 2.17 1 5
# ℹ 122 more rows
custom_order <- c("All", "Age", "Gender", "Race", "Ethnicity", "Profession", "Had COVID vaccine", "Had flu vaccine this year")
covid_survey_summary_stats$explanatory <- factor(covid_survey_summary_stats$explanatory, levels = custom_order)
covid_survey_summary_stats_sorted <- covid_survey_summary_stats[order(covid_survey_summary_stats$explanatory), ]
print(covid_survey_summary_stats_sorted)# A tibble: 132 × 6
# Groups: explanatory, explanatory_value [22]
explanatory explanatory_value response mean low high
<fct> <chr> <chr> <dbl> <dbl> <dbl>
1 All "" I am concerned about the saf… 3.26 1 5
2 All "" I am confident in the scient… 1.43 1 2
3 All "" Getting the vaccine will mak… 1.34 1 2
4 All "" Based on my understanding, I… 2.04 1 5
5 All "" I trust the information that… 1.38 1 2
6 All "" I will recommend the vaccine… 1.21 1 2
7 Age "21-25" I am concerned about the saf… 3.32 2 5
8 Age "21-25" I am confident in the scient… 1.30 1 2
9 Age "21-25" Getting the vaccine will mak… 1.18 1 2
10 Age "21-25" Based on my understanding, I… 1.97 1 5
# ℹ 122 more rows
custom_order <- c("", ">30", "26-30", "21-25", "<20", "Prefer not to say", "Non-binary third gender", "Male", "Female",
"White", "Native Hawaiian / Other Pacific Islander", "Black or African American", "Asian", "American Indian / Alaskan Native",
"Non-Hispanic/Non-Latino", "Hispanic / Latino", "Nursing", "Medical", "Yes", "No")
covid_survey_summary_stats_sorted$explanatory_value <- factor(covid_survey_summary_stats_sorted$explanatory_value, levels = custom_order)
covid_survey_summary_stats_sorted <- covid_survey_summary_stats_sorted[order(covid_survey_summary_stats_sorted$explanatory_value), ]
print(covid_survey_summary_stats_sorted)# A tibble: 132 × 6
# Groups: explanatory, explanatory_value [22]
explanatory explanatory_value response mean low high
<fct> <fct> <chr> <dbl> <dbl> <dbl>
1 All "" I am concerned about the saf… 3.26 1 5
2 All "" I am confident in the scient… 1.43 1 2
3 All "" Getting the vaccine will mak… 1.34 1 2
4 All "" Based on my understanding, I… 2.04 1 5
5 All "" I trust the information that… 1.38 1 2
6 All "" I will recommend the vaccine… 1.21 1 2
7 Age ">30" I am concerned about the saf… 3.02 1 5
8 Age ">30" I am confident in the scient… 1.69 1 3
9 Age ">30" Getting the vaccine will mak… 1.75 1 4
10 Age ">30" Based on my understanding, I… 1.83 1 4
# ℹ 122 more rows
ggplot(covid_survey_summary_stats_sorted, aes(x = mean, y = explanatory_value)) +
geom_point(position = position_dodge(width = 0.5)) +
geom_errorbarh(aes(xmin = low, xmax = high), height = 0.2, position = position_dodge(width = 0.5)) +
facet_grid(cols = vars(response),
rows = vars(explanatory),
labeller = labeller(response = label_wrap_gen(15),
explanatory = label_wrap_gen(15)),
space = "free_y",
scales = "free_y") +
labs(x = "Mean likert score \n (Error bars range from 10th to 90th percentile)", y = "") +
theme_minimal() +
theme(
strip.background = element_rect(fill = "gray90", color = "lightgray"),
strip.text.x = element_text(angle = 0),
strip.text.y = element_text(angle = 0)
) +
removeGrid()Overall, I do not see a great difference between the 10 - 90th percentile plot and the 25 - 75th percentile plot. the error bar on the 25 - 75th percentile plot is narrower, but I do not think it is adding much value to the plot that the 10 - 90th percentile plot already provides. Hence my conclusion still stands that the overall sentiment towards the COVID vaccine is positive.
4 - COVID survey - re-reconstruct
covid_survey_summary_stats_all <- covid_survey_longer %>%
group_by(response) %>%
summarise(
mean = mean(as.numeric(response_value), na.rm = TRUE),
low = quantile(as.numeric(response_value), probs = 0.25, na.rm = TRUE),
high = quantile(as.numeric(response_value), probs = 0.75, na.rm = TRUE),
explanatory = "All",
explanatory_value = ""
)
print(covid_survey_summary_stats_all)# A tibble: 6 × 6
response mean low high explanatory explanatory_value
<chr> <dbl> <dbl> <dbl> <chr> <chr>
1 resp_concern_safety 3.26 2 4 All ""
2 resp_confidence_science 1.43 1 2 All ""
3 resp_feel_safe_at_work 1.34 1 1 All ""
4 resp_safety 2.04 1 3 All ""
5 resp_trust_info 1.38 1 2 All ""
6 resp_will_recommend 1.21 1 1 All ""
covid_survey_summary_stats <- bind_rows(
covid_survey_summary_stats_by_group,
covid_survey_summary_stats_all
)
print(covid_survey_summary_stats)# A tibble: 132 × 6
# Groups: explanatory, explanatory_value [22]
explanatory explanatory_value response mean low high
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 exp_age_bin 21-25 resp_concern_safety 3.32 2 5
2 exp_age_bin 21-25 resp_confidence_science 1.30 1 2
3 exp_age_bin 21-25 resp_feel_safe_at_work 1.18 1 2
4 exp_age_bin 21-25 resp_safety 1.97 1 5
5 exp_age_bin 21-25 resp_trust_info 1.29 1 2
6 exp_age_bin 21-25 resp_will_recommend 1.09 1 1
7 exp_age_bin 26-30 resp_concern_safety 3.32 1 5
8 exp_age_bin 26-30 resp_confidence_science 1.39 1 2
9 exp_age_bin 26-30 resp_feel_safe_at_work 1.27 1 2
10 exp_age_bin 26-30 resp_safety 2.17 1 5
# ℹ 122 more rows
covid_survey_summary_stats <- covid_survey_summary_stats %>%
mutate(
explanatory = case_when(
explanatory == "exp_age_bin" ~ "Age",
explanatory == "exp_already_vax" ~ "Had COVID vaccine",
explanatory == "exp_flu_vax" ~ "Had flu vaccine this year",
explanatory == "exp_profession" ~ "Profession",
explanatory == "exp_gender" ~ "Gender",
explanatory == "exp_race" ~ "Race",
explanatory == "exp_ethnicity" ~ "Ethnicity",
explanatory == "All" ~ "All"
),
response = case_when(
response == "resp_safety" ~ "Based on my understanding, I believe the vaccine is safe",
response == "resp_confidence_science" ~ "I am confident in the scientific vetting process for the new COVID vaccines",
response == "resp_feel_safe_at_work" ~ "Getting the vaccine will make me feel safer at work",
response == "resp_will_recommend" ~ "I will recommend the vaccine to family, friends, and community members",
response == "resp_trust_info" ~ "I trust the information that I have received about the vaccines",
response == "resp_concern_safety" ~ "I am concerned about the safety and side effects of the vaccine"
)
)
print(covid_survey_summary_stats)# A tibble: 132 × 6
# Groups: explanatory, explanatory_value [22]
explanatory explanatory_value response mean low high
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 Age 21-25 I am concerned about the saf… 3.32 2 5
2 Age 21-25 I am confident in the scient… 1.30 1 2
3 Age 21-25 Getting the vaccine will mak… 1.18 1 2
4 Age 21-25 Based on my understanding, I… 1.97 1 5
5 Age 21-25 I trust the information that… 1.29 1 2
6 Age 21-25 I will recommend the vaccine… 1.09 1 1
7 Age 26-30 I am concerned about the saf… 3.32 1 5
8 Age 26-30 I am confident in the scient… 1.39 1 2
9 Age 26-30 Getting the vaccine will mak… 1.27 1 2
10 Age 26-30 Based on my understanding, I… 2.17 1 5
# ℹ 122 more rows
custom_order <- c("All", "Age", "Gender", "Race", "Ethnicity", "Profession", "Had COVID vaccine", "Had flu vaccine this year")
covid_survey_summary_stats$explanatory <- factor(covid_survey_summary_stats$explanatory, levels = custom_order)
covid_survey_summary_stats_sorted_2 <- covid_survey_summary_stats[order(covid_survey_summary_stats$explanatory), ]
print(covid_survey_summary_stats_sorted_2)# A tibble: 132 × 6
# Groups: explanatory, explanatory_value [22]
explanatory explanatory_value response mean low high
<fct> <chr> <chr> <dbl> <dbl> <dbl>
1 All "" I am concerned about the saf… 3.26 2 4
2 All "" I am confident in the scient… 1.43 1 2
3 All "" Getting the vaccine will mak… 1.34 1 1
4 All "" Based on my understanding, I… 2.04 1 3
5 All "" I trust the information that… 1.38 1 2
6 All "" I will recommend the vaccine… 1.21 1 1
7 Age "21-25" I am concerned about the saf… 3.32 2 5
8 Age "21-25" I am confident in the scient… 1.30 1 2
9 Age "21-25" Getting the vaccine will mak… 1.18 1 2
10 Age "21-25" Based on my understanding, I… 1.97 1 5
# ℹ 122 more rows
custom_order_2 <- c("", ">30", "26-30", "21-25", "<20", "Prefer not to say", "Non-binary third gender", "Male", "Female",
"White", "Native Hawaiian / Other Pacific Islander", "Black or African American", "Asian", "American Indian / Alaskan Native",
"Non-Hispanic/Non-Latino", "Hispanic / Latino", "Nursing", "Medical", "Yes", "No")
covid_survey_summary_stats_sorted_2$explanatory_value <- factor(covid_survey_summary_stats_sorted_2$explanatory_value, levels = custom_order_2)
covid_survey_summary_stats_sorted_2 <- covid_survey_summary_stats_sorted_2[order(covid_survey_summary_stats_sorted_2$explanatory_value), ]
print(covid_survey_summary_stats_sorted_2)# A tibble: 132 × 6
# Groups: explanatory, explanatory_value [22]
explanatory explanatory_value response mean low high
<fct> <fct> <chr> <dbl> <dbl> <dbl>
1 All "" I am concerned about the saf… 3.26 2 4
2 All "" I am confident in the scient… 1.43 1 2
3 All "" Getting the vaccine will mak… 1.34 1 1
4 All "" Based on my understanding, I… 2.04 1 3
5 All "" I trust the information that… 1.38 1 2
6 All "" I will recommend the vaccine… 1.21 1 1
7 Age ">30" I am concerned about the saf… 3.02 1 5
8 Age ">30" I am confident in the scient… 1.69 1 3
9 Age ">30" Getting the vaccine will mak… 1.75 1 4
10 Age ">30" Based on my understanding, I… 1.83 1 4
# ℹ 122 more rows
ggplot(covid_survey_summary_stats_sorted_2, aes(x = mean, y = explanatory_value)) +
geom_point(position = position_dodge(width = 0.5)) +
geom_errorbarh(aes(xmin = low, xmax = high), height = 0.2, position = position_dodge(width = 0.5)) +
facet_grid(cols = vars(response),
rows = vars(explanatory),
labeller = labeller(response = label_wrap_gen(15),
explanatory = label_wrap_gen(15)),
space = "free_y",
scales = "free_y") +
labs(x = "Mean likert score \n (Error bars range from 25th to 75th percentile)", y = "") +
theme_minimal() +
theme(
strip.background = element_rect(fill = "gray90", color = "lightgray"),
strip.text.x = element_text(angle = 0),
strip.text.y = element_text(angle = 0)
) +
removeGrid()5 - COVID survey - another view
covid_survey_longer <- covid_survey_longer %>%
mutate(
response_value = as.numeric(response_value),
explanatory = case_when(
explanatory == "exp_age_bin" ~ "Age",
explanatory == "exp_already_vax" ~ "Had COVID vaccine",
explanatory == "exp_flu_vax" ~ "Had flu vaccine this year",
explanatory == "exp_profession" ~ "Profession",
explanatory == "exp_gender" ~ "Gender",
explanatory == "exp_race" ~ "Race",
explanatory == "exp_ethnicity" ~ "Ethnicity"
),
response = case_when(
response == "resp_safety" ~ "Based on my understanding, I believe the vaccine is safe",
response == "resp_confidence_science" ~ "I am confident in the scientific vetting process for the new COVID vaccines",
response == "resp_feel_safe_at_work" ~ "Getting the vaccine will make me feel safer at work",
response == "resp_will_recommend" ~ "I will recommend the vaccine to family, friends, and community members",
response == "resp_trust_info" ~ "I trust the information that I have received about the vaccines",
response == "resp_concern_safety" ~ "I am concerned about the safety and side effects of the vaccine"
)
)
print(covid_survey_longer)# A tibble: 38,892 × 5
response_id explanatory explanatory_value response response_value
<dbl> <chr> <chr> <chr> <dbl>
1 1 Profession Nursing Based o… 5
2 1 Profession Nursing I am co… 2
3 1 Profession Nursing I am co… 2
4 1 Profession Nursing Getting… 1
5 1 Profession Nursing I will … 1
6 1 Profession Nursing I trust… 1
7 1 Had flu vaccine this y… Yes Based o… 5
8 1 Had flu vaccine this y… Yes I am co… 2
9 1 Had flu vaccine this y… Yes I am co… 2
10 1 Had flu vaccine this y… Yes Getting… 1
# ℹ 38,882 more rows
covid_survey_longer_grouped <- covid_survey_longer %>%
group_by(response, response_value) %>%
summarise(total = sum(response_value), .groups = "drop") %>%
group_by(response) %>%
mutate(pct = total / sum(total)) %>%
ungroup()
print(covid_survey_longer_grouped)# A tibble: 30 × 4
response response_value total pct
<chr> <dbl> <dbl> <dbl>
1 Based on my understanding, I believe the vaccine… 1 3920 0.296
2 Based on my understanding, I believe the vaccine… 2 1848 0.139
3 Based on my understanding, I believe the vaccine… 3 693 0.0523
4 Based on my understanding, I believe the vaccine… 4 980 0.0740
5 Based on my understanding, I believe the vaccine… 5 5810 0.438
6 Getting the vaccine will make me feel safer at w… 1 5306 0.611
7 Getting the vaccine will make me feel safer at w… 2 1316 0.152
8 Getting the vaccine will make me feel safer at w… 3 672 0.0774
9 Getting the vaccine will make me feel safer at w… 4 336 0.0387
10 Getting the vaccine will make me feel safer at w… 5 1050 0.121
# ℹ 20 more rows
covid_survey_longer_grouped <- covid_survey_longer_grouped %>%
mutate(
response_value = as.character(response_value),
response_value = case_when(
response_value == "1" ~ "Strongly Agree",
response_value == "2" ~ "Somewhat Agree",
response_value == "3" ~ "Neither Agree Nor Disagree",
response_value == "4" ~ "Somewhat Disagree",
response_value == "5" ~ "Strongly Disagree"
)
)custom_order_3 <- c("Strongly Disagree", "Somewhat Disagree", "Neither Agree Nor Disagree", "Somewhat Agree", "Strongly Agree")
covid_survey_longer_grouped$response_value <- factor(covid_survey_longer_grouped$response_value, levels = custom_order_3)
covid_survey_longer_grouped <- covid_survey_longer_grouped[order(covid_survey_longer_grouped$response_value), ]
print(covid_survey_longer_grouped)# A tibble: 30 × 4
response response_value total pct
<chr> <fct> <dbl> <dbl>
1 Based on my understanding, I believe the vaccine… Strongly Disa… 5810 0.438
2 Getting the vaccine will make me feel safer at w… Strongly Disa… 1050 0.121
3 I am concerned about the safety and side effects… Strongly Disa… 7245 0.342
4 I am confident in the scientific vetting process… Strongly Disa… 630 0.0682
5 I trust the information that I have received abo… Strongly Disa… 385 0.0429
6 I will recommend the vaccine to family, friends,… Strongly Disa… 490 0.0627
7 Based on my understanding, I believe the vaccine… Somewhat Disa… 980 0.0740
8 Getting the vaccine will make me feel safer at w… Somewhat Disa… 336 0.0387
9 I am concerned about the safety and side effects… Somewhat Disa… 7812 0.369
10 I am confident in the scientific vetting process… Somewhat Disa… 588 0.0636
# ℹ 20 more rows
ggplot(covid_survey_longer_grouped, aes(x = response_value, y = pct)) +
geom_col() +
coord_flip() +
facet_grid(rows = vars(response),
labeller = labeller(response = label_wrap_gen(15),
explanatory = label_wrap_gen(15)),
space = "free_y",
scales = "free_y") +
theme(
strip.background = element_rect(fill = "gray90", color = "lightgray"),
strip.text.y = element_text(angle = 0)
) +
labs(title = "Percentage of Votes to COVID Questions",
x = "COVID Survey Questions",
y = "Percent of votes per Response Value",
fill = "Response Value")+
scale_y_continuous(labels = scales::percent)ggplot(covid_survey_longer_grouped, aes(y = pct, fill = response_value, x = "")) +
coord_flip() +
geom_bar(position="stack", stat="identity") +
scale_fill_viridis_d(option = "D") +
labs(title = "Percentage of Votes to COVID Questions",
x = "COVID Survey Questions",
y = "Percent of votes per Response Value",
fill = "Response Value") +
facet_grid(rows = vars(response),
labeller = labeller(response = label_wrap_gen(15),
explanatory = label_wrap_gen(15)),
space = "free_y",
scales = "free_y") +
theme(
strip.background = element_rect(fill = "gray90", color = "lightgray"),
strip.text.y = element_text(angle = 0)
)+
scale_y_continuous(labels = scales::percent)Both the diverging bar chart and the stacked bar chart provide a clear understanding of the sentiment towards the COVID vaccine. The diverging bar plot shows that the “Strongly Agree” responses dominate on the plot. Although the stack bar chart illustrates the same information, it also shows that in certain cases, the sentiment is neutral overall. In other words, every answers are equally distributed, which a bit harder to see in the diverging chart.